## Stephen Moon
## Statistics Thesis

# Setup ####

library(stargazer)


setwd("/Users/smoon/Desktop/Thesis/Data/")

source("ThesisHelperFunctions.R")

results <- read.csv("RacesWithDIME.csv", header = T, as.is = T)
results$year.x <- as.factor(results$year.x)
results$partyControl <- as.factor(results$partyControl)
results <- results[-which(results$year.x == 1980),]
N <- nrow(results)



#make a custom data frame for the variables of interest
variables <- c("abroadPrcnt", "recentArrivalPrcnt", "totalPopBirthPlace", 
               "prcntForeignBorn", "prcntExAliens", "totalHouseholds",
               "under10k", "over10k", "over15k", "over25k", "over35k",
               "over50k", "over75k", "over100k", "over150k", "over200k",
               "meanIncome", "medianIncome", "prcntUnemp","prcntNotEmploy",
               "prcntBA", "prcntHS", "prcntAsian", "prcntBlack", "prcntBlackNotHisp",
               "prcntHisp", "prcntMulti", "prcntWhite", "prcntWhiteAll",
               "prcntNotHisp", "prcntOld", "medianAge", "gini", "dwnom1")
#NOTE: if you add a variable above you MUST add it here too
goodNames <- c("abroad pct.", "recent arrival pct.", "total population, birthplace",
              "foreign born pct.", "ex aliens pct.", "total households",
              "under 10k", "over 10k", "over 15k", "over 25k", "over 35k",
              "over 50k", "over 75k", "over 100k", "over 150k", "over 200k",
              "mean income", "median income", "unemployment pct.", "pct. not employed",
              "bachelor\'s degree pct.", "HS degree pct.", "pct. Asian", "pct. Black", "pct. non-Hispanic Black",
              "pct. Hispanic", "pct. multi-racial", "pct. White", "pct. all Whites",
              "pct. non-Hispanic", "pct. old", "median age", "gini", "DW-Nominate 1")
n <- length(variables)
indices <- 1:n
for(i in 1:n) {
  indices[i] <- which(names(results) == variables[i])
}

R <- results$uncontested

#vectors to store the means for each variable
overall <- 1:n
contested <- 1:n
uncontested <- 1:n
pval <- 1:n

for(i in 1:n) {
  col <- results[,indices[i]]
  con <- col[!R]
  uncon <- col[R]
  
  overall[i] <- mean(col, na.rm = T)
  contested[i] <- mean(con, na.rm = T)
  uncontested[i] <- mean(uncon, na.rm = T)
  
  result = tryCatch({
    t.test(con, uncon, na.rm = T)$p.value
  }, warning = function(w) {
    ""
  }, error = function(e) {
    ""
  }, finally = {
    ""
  })
  pval[i] <- result
}

table <- data.frame(variable = goodNames, overall = overall, 
                    contested = contested, uncontested = uncontested)

stargazer(table, summary = F, rownames = F, digits = 1,
          caption = "All difference-in-means p-values (except where data are not available) are below 0.01.")
